import os
import csv
import locale
from numpy import *
from scipy.interpolate import interp1d
from pylab import *

from matplotlib.ticker import NullFormatter


directory = "/osc-fs_home/mdehoon/Data/CASPARs/MiSeq/Bioanalyzer/"

locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') 

def read_ladder(dataset):
    filename = "%s_Results.csv" % dataset
    path = os.path.join(directory, dataset, filename)
    print("Reading", path)
    with open(path, encoding='latin1') as stream:
        rows = csv.reader(stream)
        sample_found = False
        table_found = False
        table_started = False
        sizes = []
        times = []
        for row in rows:
            if row == ['Sample Name', 'Ladder']:
                sample_found = True
            elif not sample_found:
                continue
            elif row == ['Peak Table']:
                table_found = True
            elif not table_found:
                continue
            elif not table_started:
                assert row == ['Size [bp]', 'Conc. [pg/µl]', 'Molarity [pmol/l]', 'Observations', 'Area', 'Aligned Migration Time [s]', 'Peak Height', 'Peak Width', '% of Total', 'Time corrected area']
                table_started = True
            elif len(row) == 1:
                break
            else:
                size = locale.atoi(row[0])
                time = float(row[5])
                sizes.append(size)
                times.append(time)
    times = array(times)
    sizes = array(sizes)
    interpolator = interp1d(times, sizes, kind='linear', fill_value='extrapolate')
    return interpolator

def read_data(dataset, sample, name):
    filename = "%s_%s.csv" % (dataset, sample)
    path = os.path.join(directory, dataset, filename)
    print("Reading", path)
    with open(path, encoding='latin1') as stream:
        rows = csv.reader(stream)
        data_found = False
        values = []
        times = []
        for row in rows:
            if row == ['Time', 'Value']:
                data_found = True
            elif row[0] == "Sample Name":
                assert name == row[1]
            elif not data_found:
                continue
            elif len(row) == 1:
                break
            else:
                time = float(row[0])
                value = float(row[1])
                times.append(time)
                values.append(value)
    times = array(times)
    values = array(values)
    return times, values


datasets = ("2100 expert_High Sensitivity DNA Assay_DE72901295_2015-04-07_15-06-16",
            "2100 expert_High Sensitivity DNA Assay_DE72902751_2015-04-07_15-12-15",
            "final_2015-04-09_15-40-19",
           )



fig = figure()

interpolator = read_ladder(datasets[0])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("1", "2", "3", "4", "5", "6", "7", "8", "9")

label = "Before size\nselection"
for sample, name in zip(samples, names):
    times, values = read_data(datasets[0], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    semilogx(sizes, values, color='blue', alpha=0.5, label=label)
    label = None

interpolator = read_ladder(datasets[1])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33")


for sample, name in zip(samples, names):
    times, values = read_data(datasets[1], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    semilogx(sizes, values, color='blue', alpha=0.5)

ymin, ymax = ylim()
plot([200, 200], [ymin, ymax], 'r--')
plot([400, 400], [ymin, ymax], 'r--', label='Selected\nsize limits')
sizes = (50, 100, 200, 300, 400, 600, 1000, 2000, 4000, 7000)
labels = [str(size) for size in sizes]
xticks(sizes, labels, fontsize=8)
yticks(fontsize=8)
xlabel("Size [bp]")
ylabel("Fluorescence units")
legend()
xlim(sizes[0], sizes[-1])
ylim(ymin, ymax)

filename = "figure_bioanalyzer_miseq.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_miseq.png"
print("Saving figure as %s" % filename)
savefig(filename)

fig = figure()

interpolator = read_ladder(datasets[0])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("1", "2", "3", "4", "5", "6", "7", "8", "9")

label = "Before size selection"
for sample, name in zip(samples, names):
    times, values = read_data(datasets[0], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    plot(sizes, values, color='blue', alpha=0.5, label=label)
    label = None

interpolator = read_ladder(datasets[1])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33")

for sample, name in zip(samples, names):
    times, values = read_data(datasets[1], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    plot(sizes, values, color='blue', alpha=0.5)

sizes = (200, 250, 300, 350, 400)
labels = [str(size) for size in sizes]
xticks(sizes, labels, fontsize=8)
yticks(fontsize=8)

ymin, ymax = -3, 6
plot([200, 200], [ymin, ymax], 'r--')
plot([400, 400], [ymin, ymax], 'r--', label='Selected size limits')
xlabel("Size [bp]")
ylabel("Fluorescence units")

xlim(180, 420)
ylim(ymin, ymax)
legend(bbox_to_anchor=(0.1, 0.16))

filename = "figure_bioanalyzer_miseq_zoomed.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_miseq_zoomed.png"
print("Saving figure as %s" % filename)
savefig(filename)


fig = figure(figsize=(6,12))

ax = fig.add_subplot(111)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='w', top=False, bottom=False, left=False, right=False)
ax.set_xlabel("Size [bp]")
ax.set_ylabel("Fluorescence units", labelpad=27)

timepoints = ("0 hours", "1 hour", "4 hours", "12 hours", "24 hours", "96 hours")

interpolator = read_ladder(datasets[0])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("1", "2", "3", "4", "5", "6", "7", "8", "9")

i = 0
for sample, name in zip(samples, names):
    times, values = read_data(datasets[0], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    ax = fig.add_subplot(6,3,i+1)
    semilogx(sizes, values, color='blue', alpha=0.5)
    yticks(fontsize=8)
    ymin, ymax = ylim()
    plot([200, 200], [ymin, ymax], 'r--')
    plot([400, 400], [ymin, ymax], 'r--')
    xlim(sizes[0], sizes[-1])
    ylim(ymin, ymax)
    xticks([])
    if i % 3 == 0:
        j = i // 3
        timepoint = timepoints[j]
        ylabel(timepoint, fontsize=8)
    if i < 3:
        title("Replicate %d" % (i+1), fontsize=8)
    i += 1

interpolator = read_ladder(datasets[1])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("23", "24", "25", "26", "27", "28", "29", "30", "31")

conditions = {"19": "STAT6 PROMPT replicate 1",     # c91_r1
              "20": "STAT6 PROMPT replicate 2",     # c91_r2
              "21": "STAT6 PROMPT replicate 3",     # c91_r3
              "22": "NC_A (STAT6) replicate 1",     # n91_r1
              "23": "NC_A (STAT6) replicate 2",     # n91_r2
              "24": "NC_A (STAT6) replicate 3",     # n91_r3
              "25": "Lipofectamine only",           # lip_r1
              "26": "Untreated cells",              # cel_r1
              "27": "Protocol negative control",    # neg_r1
              "28": "MYB CASPAR replicate 1",       # myb_r1
              "29": "MYB CASPAR replicate 2",       # myb_r2
              "30": "MYB CASPAR replicate 3",       # myb_r3
              "31": "GFI1 PROMPT replicate 1",      # gfi_r1
              "32": "GFI1 PROMPT replicate 2",      # gfi_r2
              "33": "GFI1 PROMPT replicate 3",      # gfi_r3
              "34": "NC_A (MYB/GFI1) replicate 1",  # 34 nkd_r1
              "35": "NC_A (MYB/GFI1) replicate 2",  # 35 nkd_r2
              "36": "NC_A (MYB/GFI1) replicate 3",  # 36 nkd_r3
             }

sorted_names = ("27",  # Protocol negative control
                "26",  # Untreated cells
                "25",  # Lipofectamine only
                "23",  # NC_A (STAT6) replicate 2
                "24",  # NC_A (STAT6) replicate 3
                "28",  # MYB CASPAR replicate 1
                "29",  # MYB CASPAR replicate 2
                "30",  # MYB CASPAR replicate 3
                "31",  # GFI1 PROMPT replicate 1
               )

for name in sorted_names:
    index = names.index(name)
    sample = samples[index]
    times, values = read_data(datasets[1], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    ax = fig.add_subplot(6,3,i+1)
    semilogx(sizes, values, color='blue', alpha=0.5)
    yticks(fontsize=8)
    ymin, ymax = ylim()
    plot([200, 200], [ymin, ymax], 'r--')
    plot([400, 400], [ymin, ymax], 'r--')
    title(conditions[name], fontsize=8, pad=2)
    if i >= 15:
        sizes = (50, 100, 300, 2000, 7000)
        labels = [str(size) for size in sizes]
        xticks(sizes, labels, fontsize=8)
    else:
        xticks([])
    xlim(sizes[0], sizes[-1])
    ylim(ymin, ymax)
    i += 1

subplots_adjust(bottom=0.08, top=0.97, left=0.14, right=0.97, wspace=0.3)

filename = "figure_bioanalyzer_miseq_separate.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_miseq_separate.png"
print("Saving figure as %s" % filename)
savefig(filename)


fig = figure(figsize=(6,12))

ax = fig.add_subplot(111)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.tick_params(labelcolor='w', top=False, bottom=False, left=False, right=False)
ax.set_xlabel("Size [bp]")
ax.set_ylabel("Fluorescence units", labelpad=27)

timepoints = ("0 hours", "1 hour", "4 hours", "12 hours", "24 hours", "96 hours")

interpolator = read_ladder(datasets[0])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("1", "2", "3", "4", "5", "6", "7", "8", "9")

i = 0
for sample, name in zip(samples, names):
    times, values = read_data(datasets[0], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    ax = fig.add_subplot(6,3,i+1)
    start = searchsorted(sizes, 180)
    end = searchsorted(sizes, 420)
    plot(sizes[start:end], values[start:end], color='blue', alpha=0.5)
    yticks(fontsize=8)
    ymax = max(values[start: end])
    ymax = 1.1 * ymax
    ymin = min(values[start: end])
    ymin = min(-ymax / 20.0, 1.1 * ymin)
    plot([200, 200], [ymin, ymax], 'r--')
    plot([400, 400], [ymin, ymax], 'r--')
    xlim(180, 420)
    ylim(ymin, ymax)
    ax.xaxis.set_minor_formatter(NullFormatter())
    ax.xaxis.set_major_formatter(NullFormatter())
    if i % 3 == 0:
        j = i // 3
        timepoint = timepoints[j]
        ylabel(timepoint, fontsize=8)
    if i < 3:
        title("Replicate %d" % (i+1), fontsize=8)
    i += 1

interpolator = read_ladder(datasets[1])
xmin = interpolator.x.min()
xmax = interpolator.x.max()

samples = ("Sample1", "Sample2", "Sample3", "Sample4", "Sample5", "Sample6", "Sample7", "Sample8", "Sample9")
names = ("23", "24", "25", "26", "27", "28", "29", "30", "31")

for name in sorted_names:
    index = names.index(name)
    sample = samples[index]
    times, values = read_data(datasets[1], sample, name)
    start = times.searchsorted(xmin)
    end = times.searchsorted(xmax) + 1
    times = times[start:end]
    values = values[start:end]
    sizes = interpolator(times)
    ax = fig.add_subplot(6,3,i+1)
    start = searchsorted(sizes, 180)
    end = searchsorted(sizes, 420)
    plot(sizes[start: end], values[start: end], color='blue', alpha=0.5)
    yticks(fontsize=8)
    ymax = max(values[start: end])
    ymin = min(values[start: end])
    ymax = 1.1 * ymax
    ymin = min(-ymax / 20.0, 1.1 * ymin)
    plot([200, 200], [ymin, ymax], 'r--')
    plot([400, 400], [ymin, ymax], 'r--')
    xlim(180, 420)
    ax.xaxis.set_minor_formatter(NullFormatter())
    if i >= 15:
        sizes = (200, 300, 400)
        labels = [str(size) for size in sizes]
        xticks(sizes, labels, fontsize=8)
    else:
        ax.xaxis.set_major_formatter(NullFormatter())
    title(conditions[name], fontsize=8, pad=2)
    ylim(ymin, ymax)
    i += 1

subplots_adjust(bottom=0.08, top=0.97, left=0.14, right=0.97, wspace=0.3)

filename = "figure_bioanalyzer_miseq_separate_zoomed.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_miseq_separate_zoomed.png"
print("Saving figure as %s" % filename)
savefig(filename)


interpolator = read_ladder(datasets[2])
xmin = interpolator.x.min()
xmax = interpolator.x.max()


times1, values1 = read_data(datasets[2], "Sample1", "before PP_sample8")
start = times1.searchsorted(xmin)
end = times1.searchsorted(xmax) + 1
times1 = times1[start:end]
values1 = values1[start:end]
sizes1 = interpolator(times1)

times2, values2 = read_data(datasets[2], "Sample2", "final (36mix, 200-400)")
start = times2.searchsorted(xmin)
end = times2.searchsorted(xmax) + 1
times2 = times2[start:end]
values2 = values2[start:end]
sizes2 = interpolator(times2)

figure()
semilogx(sizes1, values1, label="Before size selection;\n4 hours, replicate 2", color='blue')
semilogx(sizes2, values2, label="After size selection;\n36-mix library", color='purple')
ymin, ymax = ylim()
plot([200, 200], [ymin, ymax], 'r--')
plot([400, 400], [ymin, ymax], 'r--', label='Selected size limits')
xlim(xmin, xmax)
sizes = (50, 100, 200, 300, 400, 600, 1000, 2000, 4000, 7000)
labels = [str(size) for size in sizes]
xticks(sizes, labels, fontsize=8)
yticks(fontsize=8)
xlabel("Size [bp]")
ylabel("Fluoresence units")
legend()
ylim(ymin, ymax)

filename = "figure_bioanalyzer_miseq_before_after.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_miseq_before_after.png"
print("Saving figure as %s" % filename)
savefig(filename)

fig = figure()
plot(sizes1, values1, label="Before size\nselection; 4 hours,\nreplicate 2", color='blue')
ylabel("Fluoresence units", color='blue')
yticks(fontsize=8, color='blue')

sizes = (200, 250, 300, 350, 400)
labels = [str(size) for size in sizes]
xticks(sizes, labels, fontsize=8)
xlabel("Size [bp]")
xlim(180, 420)
ymin, ymax = ylim()
ylim(-ymax/20, ymax)

ax2 = fig.axes[0].twinx()
ax2.plot(sizes2, values2, label="After size selection;\n36-mix library", color='purple')
ylabel("Fluoresence units", color='purple')
yticks(fontsize=8, color='purple')
xticks(sizes, labels, fontsize=8)
xlim(180, 420)
ymin, ymax = ylim()
ylim(-ymax/20, ymax)

ymin, ymax = ylim()
plot([200, 200], [ymin, ymax], 'r--')
plot([400, 400], [ymin, ymax], 'r--', label='Selected size limits')

lines, labels = fig.axes[0].get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, bbox_to_anchor=(0.08,1), loc='upper left')

filename = "figure_bioanalyzer_miseq_before_after_zoomed.svg"
print("Saving figure as %s" % filename)
savefig(filename)

filename = "figure_bioanalyzer_miseq_before_after_zoomed.png"
print("Saving figure as %s" % filename)
savefig(filename)
